# All-inclusive model
lm_pre_alpha <- lm(sold_price ~ . , data = data_factor_core)
summ(lm_pre_alpha)
# pre_alphaing for heteroskedasticity
# a. Graphically
par(mfrow = c(2,2))
plot(lm_pre_alpha)
#autoplot(lm_pre_alpha)
# b. Statistically
ols_test_breusch_pagan(lm_pre_alpha) # Breusch-Pagan test
# - Resolving Heteroskedasticity using heteroskedasticity-consistent (HC) variance covariance matrix
# Compare models
stargazer(lm_pre_alpha,
coeftest(lm_pre_alpha, vcov = vcovHC(lm_pre_alpha, method = "White2", type = "HC0")),
coeftest(lm_pre_alpha, vcov = vcovHC(lm_pre_alpha, method = "White2", type = "HC1")),
type = "text")
Note: Advisor suggested not to inlude interaction terms except for specific testing.
# Age
a <- ggplot(data_factor, aes(x = age , y = sold_price)) +
geom_smooth(aes(fill = infections_period)) +
geom_smooth(linetype = "dashed", color = "grey32") +
theme_minimal() +
#scale_fill_manual(values=c(very_low, med)) +
labs(title = "Age and Price",
x = "Age",
y = "Price") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
a
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Actual vs. fit
# Model with non-linear addition
lm_pre_alpha_age <- lm(sold_price ~ . + I(age^2), data = data_factor_core)
summ(lm_pre_alpha_age)
MODEL INFO:
Observations: 24394 (18 missing obs. deleted)
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(66,24327) = 36748.75, p = 0.00
R² = 0.99
Adj. R² = 0.99
Standard errors: OLS
-------------------------------------------------------------------------
Est. S.E. t val. p
----------------------------------- ----------- --------- -------- ------
(Intercept) -12663.15 9510.30 -1.33 0.18
property_typeDUP -1420.31 2871.50 -0.49 0.62
property_typeOTH -2648.78 2053.45 -1.29 0.20
property_typePAT -626.11 929.50 -0.67 0.50
property_typeSGL 1784.81 437.71 4.08 0.00
property_typeTNH 510.26 551.55 0.93 0.35
ac_typenone -83.17 380.75 -0.22 0.83
ac_typenot_central -1707.03 245.87 -6.94 0.00
list_price 0.98 0.00 888.10 0.00
patio1 775.22 126.90 6.11 0.00
school_general1 151.15 161.81 0.93 0.35
photo_count -29.40 7.65 -3.84 0.00
pool1 -91.33 211.57 -0.43 0.67
roof_typeother 1123.91 232.86 4.83 0.00
roof_typeshingle 1815.51 262.56 6.91 0.00
roof_typeslate 404.62 1113.88 0.36 0.72
gas_typenatural 4180.15 8533.24 0.49 0.62
gas_typenone 3729.55 8529.10 0.44 0.66
gas_typepropane -124.79 8729.36 -0.01 0.99
gas_typeunknown 3388.93 8528.19 0.40 0.69
out_building1 -424.03 137.78 -3.08 0.00
area_living -0.82 0.27 -3.01 0.00
land_acres -305.11 154.40 -1.98 0.05
appliances1 850.31 172.71 4.92 0.00
garage1 623.51 127.04 4.91 0.00
property_conditionnew -4181.83 789.37 -5.30 0.00
property_conditionother -425.40 169.02 -2.52 0.01
energy_efficient1 589.01 141.61 4.16 0.00
exterior_typemetal -78.23 402.23 -0.19 0.85
exterior_typeother 35.68 167.52 0.21 0.83
exterior_typevinyl 390.50 185.92 2.10 0.04
exterior_typewood -646.79 262.80 -2.46 0.01
exterior_featurescourtyard 2427.56 1466.46 1.66 0.10
exterior_featuresfence 1028.53 614.35 1.67 0.09
exterior_featuresnone 1539.28 615.57 2.50 0.01
exterior_featuresporch 950.44 629.21 1.51 0.13
exterior_featurestennis_court 536.63 1724.79 0.31 0.76
fireplace1 408.15 131.51 3.10 0.00
foundation_typeslab 1016.20 191.35 5.31 0.00
foundation_typeunspecified -110.61 229.01 -0.48 0.63
area_total -0.15 0.16 -0.97 0.33
beds_total1 -441.27 3175.53 -0.14 0.89
beds_total2 -837.36 3145.00 -0.27 0.79
beds_total3 -195.13 3148.38 -0.06 0.95
beds_total4 639.15 3154.44 0.20 0.84
beds_total5 -183.17 3212.66 -0.06 0.95
bath_full1 2051.07 3355.33 0.61 0.54
bath_full2 2540.17 3355.08 0.76 0.45
bath_full3 2065.05 3363.12 0.61 0.54
bath_full4 -2648.80 3755.25 -0.71 0.48
bath_full6 -5631.95 9199.84 -0.61 0.54
bath_half1 -295.30 166.82 -1.77 0.08
bath_half2 -1640.99 1098.85 -1.49 0.14
bath_half3 1510.31 6029.59 0.25 0.80
bath_half4 8533.97 8532.40 1.00 0.32
bath_half5 -8590.00 4932.13 -1.74 0.08
age -124.18 11.06 -11.23 0.00
dom -7.97 1.08 -7.37 0.00
sold_date 0.17 0.07 2.64 0.01
sewer_typeseptic -185.34 237.12 -0.78 0.43
sewer_typeunspecified 275.03 129.35 2.13 0.03
property_stylenot_mobile 2262.11 353.36 6.40 0.00
subdivision1 396.83 151.52 2.62 0.01
water_typewell 641.52 599.64 1.07 0.28
waterfront1 -1671.69 225.43 -7.42 0.00
bottom25_dom1 2367.79 158.88 14.90 0.00
I(age^2) 1.16 0.14 8.32 0.00
-------------------------------------------------------------------------
# Marginal effects data frames
ggpredict_1 <- ggpredict(lm_pre_alpha, terms = "age")
ggpredict_2 <- ggpredict(lm_pre_alpha_age, terms = "age")
# Plots
b <- ggplot(data_factor_core, aes( x = age)) +
geom_smooth(data_factor_core, mapping = aes(y = sold_price), color = "grey50") +
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = very_low) +
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = med) +
labs(title = "Age and Price",
x = "Age",
y = "Prediction")
# Look at age & age^2 alone to see impact on more relevant y-axis scale
c <- ggplot() +
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = very_low) +
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = med) +
labs(title = "Age and Price",
x = "Age",
y = "Prediction")
a
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
gridExtra::grid.arrange(b,c, nrow =2, ncol = 1)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
# Living Area
# General graphing
a <- ggplot(data_factor, aes(x = area_living , y = sold_price)) +
geom_smooth(aes(fill = infections_period)) +
geom_smooth(linetype = "dashed", color = "grey32") +
theme_minimal() +
#scale_fill_manual(values=c(very_low, med)) +
labs(title = "Living Area and Price",
x = "Living Area",
y = "Price") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
a
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
ggplot(data_factor, aes(x = area_living , y = sold_price/area_living)) +
geom_point(aes(color = infections_period), alpha = 0.15) +
geom_smooth(aes(color = infections_period)) +
geom_smooth(color = "grey50", linetype = "dashed") +
theme_minimal()
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Actual vs. fit
# Model with non-linear addition
lm_pre_alpha_area <- lm(sold_price ~ . + I(area_living^2), data = data_factor_core)
summ(lm_pre_alpha_area)
MODEL INFO:
Observations: 24394 (18 missing obs. deleted)
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(66,24327) = 36741.79, p = 0.00
R² = 0.99
Adj. R² = 0.99
Standard errors: OLS
-------------------------------------------------------------------------
Est. S.E. t val. p
----------------------------------- ----------- --------- -------- ------
(Intercept) -21671.88 9522.17 -2.28 0.02
property_typeDUP -1333.48 2871.86 -0.46 0.64
property_typeOTH -2804.77 2053.59 -1.37 0.17
property_typePAT -620.44 929.59 -0.67 0.50
property_typeSGL 1770.31 437.77 4.04 0.00
property_typeTNH 370.43 551.95 0.67 0.50
ac_typenone 62.25 381.06 0.16 0.87
ac_typenot_central -1498.05 246.37 -6.08 0.00
list_price 0.98 0.00 896.13 0.00
patio1 798.99 126.79 6.30 0.00
school_general1 241.58 161.60 1.49 0.13
photo_count -34.70 7.62 -4.55 0.00
pool1 -73.45 211.70 -0.35 0.73
roof_typeother 1098.57 233.01 4.71 0.00
roof_typeshingle 1920.08 261.94 7.33 0.00
roof_typeslate 536.02 1113.83 0.48 0.63
gas_typenatural 4855.78 8534.04 0.57 0.57
gas_typenone 4318.56 8530.00 0.51 0.61
gas_typepropane 87.56 8730.21 0.01 0.99
gas_typeunknown 3979.77 8529.01 0.47 0.64
out_building1 -490.59 137.56 -3.57 0.00
area_living 6.54 0.95 6.85 0.00
land_acres -285.71 154.41 -1.85 0.06
appliances1 921.60 172.47 5.34 0.00
garage1 666.84 126.78 5.26 0.00
property_conditionnew -3617.20 784.80 -4.61 0.00
property_conditionother -364.93 168.83 -2.16 0.03
energy_efficient1 601.45 141.63 4.25 0.00
exterior_typemetal 16.32 402.32 0.04 0.97
exterior_typeother 58.29 167.52 0.35 0.73
exterior_typevinyl 417.26 185.92 2.24 0.02
exterior_typewood -554.23 262.89 -2.11 0.04
exterior_featurescourtyard 2805.14 1465.90 1.91 0.06
exterior_featuresfence 1048.09 614.40 1.71 0.09
exterior_featuresnone 1584.20 615.59 2.57 0.01
exterior_featuresporch 1119.15 628.89 1.78 0.08
exterior_featurestennis_court 870.69 1724.92 0.50 0.61
fireplace1 264.36 131.42 2.01 0.04
foundation_typeslab 819.18 189.82 4.32 0.00
foundation_typeunspecified -213.55 228.49 -0.93 0.35
area_total -0.27 0.16 -1.71 0.09
beds_total1 -1072.82 3176.15 -0.34 0.74
beds_total2 -2553.13 3149.94 -0.81 0.42
beds_total3 -2327.60 3156.66 -0.74 0.46
beds_total4 -1389.50 3161.99 -0.44 0.66
beds_total5 -1954.03 3218.17 -0.61 0.54
bath_full1 3642.81 3358.78 1.08 0.28
bath_full2 3719.16 3356.69 1.11 0.27
bath_full3 3740.67 3367.07 1.11 0.27
bath_full4 -544.43 3761.07 -0.14 0.88
bath_full6 -3367.38 9198.35 -0.37 0.71
bath_half1 -274.20 167.01 -1.64 0.10
bath_half2 -1480.96 1099.19 -1.35 0.18
bath_half3 1451.22 6030.19 0.24 0.81
bath_half4 7762.17 8533.71 0.91 0.36
bath_half5 -8041.27 4933.05 -1.63 0.10
age -37.00 3.75 -9.87 0.00
dom -8.28 1.08 -7.66 0.00
sold_date 0.28 0.06 4.35 0.00
sewer_typeseptic -304.75 236.80 -1.29 0.20
sewer_typeunspecified 258.97 129.37 2.00 0.05
property_stylenot_mobile 2105.89 353.77 5.95 0.00
subdivision1 401.26 151.53 2.65 0.01
water_typewell 557.86 599.60 0.93 0.35
waterfront1 -1642.44 225.40 -7.29 0.00
bottom25_dom1 2331.25 158.82 14.68 0.00
I(area_living^2) -0.00 0.00 -8.04 0.00
-------------------------------------------------------------------------
# Model with single-variable fit
lm_pre_alpha_area_single <- lm(sold_price ~ area_living, data = data_factor_core)
summ(lm_pre_alpha_area_single)
MODEL INFO:
Observations: 24412
Dependent Variable: sold_price
Type: OLS linear regression
MODEL FIT:
F(1,24410) = 14244.19, p = 0.00
R² = 0.37
Adj. R² = 0.37
Standard errors: OLS
-------------------------------------------------------
Est. S.E. t val. p
----------------- ----------- --------- -------- ------
(Intercept) -20238.66 1644.55 -12.31 0.00
area_living 113.16 0.95 119.35 0.00
-------------------------------------------------------
# Marginal effects data frames
ggpredict_1 <- ggpredict(lm_pre_alpha, terms = "area_living") # total model
ggpredict_2 <- ggpredict(lm_pre_alpha_area, terms = "area_living") # non-linear addition
ggpredict_3 <- ggpredict(lm_pre_alpha_area_single, terms = "area_living") # single-variable fit
# Plots
b <- ggplot(data_factor_core, aes(x = area_living)) +
geom_smooth(data_factor, mapping = aes(y = sold_price), color = "grey50") +
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = very_low) +
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = med) +
labs(title = "Living Area and Price",
x = "Living Area",
y = "Prediction")
# Look at age & age^2 alone to see impact on more relevant y-axis scale
c <- ggplot() +
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = very_low) +
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = med) +
labs(title = "Living Area and Price",
x = "Living Area",
y = "Prediction")
# Conclusion
a
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
gridExtra::grid.arrange(b,c, nrow =2, ncol = 1)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# General graphing
ggplot(data_factor, aes(x = land_acres , y = sold_price)) +
geom_point(aes(color = infections_period), alpha = 0.15) +
geom_smooth(aes(color = infections_period)) +
geom_smooth(color = "grey50", linetype = "dashed") +
theme_minimal()
ggplot(data_factor, aes(x = land_acres, y = sold_price/land_acres)) +
geom_point(aes(color = infections_period), alpha = 0.15) +
geom_smooth(aes(color = infections_period)) +
geom_smooth(color = "grey50", linetype = "dashed") +
theme_minimal()
#Additions
data_factor_core_clean <- data_factor_core
data_factor_core_clean$age_2 <- I(data_factor_core$age^2)
data_factor_core_clean$area_living_2 <- I(data_factor_core$area_living^2)
# Full model summary
summ(lm_pre_alpha)
# Check Variance Inflation Factors (VIF)
VIF(lm_pre_alpha)
alias(lm_pre_alpha)
# Total area and living area are found to be significantly (i.e. VIF > 5) multicolinear (expected)
# Solution: Remove area_total
# Note the significant drop in R^2 from 0.99 to 0.86
lm_pre_alpha_cleaned <- lm(log(sold_price) ~ . - area_total ,data = data_factor_core)
summ(lm_pre_alpha_cleaned)
VIF(lm_pre_alpha_cleaned)
# Final pre_alpha
VIF(lm_pre_alpha_cleaned)
alias(lm_pre_alpha_cleaned)
# Another way to check for multicollinearity is visually through the mcvis package
data_numeric <- select_if(data_factor_core, is.numeric) # Subset numeric columns with dplyr
mcvis_result <- mcvis(X = data_numeric)
a <- plot(mcvis_result)
par(mfrow = c(2,2))
#Removals
data_numeric <- subset(data_numeric, select = -c(list_price))
mcvis_result <- mcvis(X = data_numeric)
b <- plot(mcvis_result)
#Removals
data_numeric <- subset(data_numeric, select = -c(area_total))
mcvis_result <- mcvis(X = data_numeric)
c <- plot(mcvis_result)
a
b
c
# Removals
# - Area_total
# - Listing price
par(mfrow = c(2,2))
data_factor_core_clean <- subset(data_factor_core_clean, select = -c(area_total, list_price))
data_factor_core_clean <- data_factor_core_clean[-c(23515), ]
cl <- makePSOCKcluster(5)
registerDoParallel(cl)
tab_model(lm_alpha, ci_method = "wald")
Profiled confidence intervals may take longer time to compute. Use 'ci_method="wald"' for faster computation of CIs.
# Waves of infection
ggplot(data_factor, aes(x = as.Date(sold_date), y = infections_3mma)) +
geom_point(color = low, alpha = 0.7) +
geom_smooth(linetype = "dashed", color = med) +
theme_minimal() +
scale_x_date(limits = as.Date(c("2020-01-01", "2021-12-31"))) +
scale_y_continuous(limits = c(0,max(infections_3mma))) +
xlab(" ") +
ylab("Confirmed Infections per Day") +
labs(title = "Waves of Infection",
caption = "") +
geom_vline(xintercept = as.numeric(as.Date("2020-03-23")), linetype=4)
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 17731 rows containing non-finite values (stat_smooth).
Warning: Removed 17731 rows containing missing values (geom_point).
Warning: Removed 3 rows containing missing values (geom_smooth).
# Accumulation of infections
ggplot(data_factor, aes(x = as.Date(sold_date), y = I(infections_accum/1000))) +
geom_point(color = low, alpha = 0.7) +
geom_smooth(linetype = "dashed", color = med) +
theme_minimal() +
scale_x_date(limits = as.Date(c("2020-01-01", "2021-12-31"))) +
scale_y_continuous(limits = c(0,max(I(infections_accum/1000)))) +
xlab(" ") +
ylab("Accumulation of Infections (in 000's)") +
labs(title = "Accumulation of Infections",
caption = "")
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
Warning: Removed 17731 rows containing non-finite values (stat_smooth).
Warning: Removed 17731 rows containing missing values (geom_point).
Warning: Removed 3 rows containing missing values (geom_smooth).
# Infections and home prices
ggplot(data_factor, aes(x = I(infections_3mma/1000), y = sold_price)) +
#geom_point() +
geom_smooth(linetype = "dashed", color = med) +
theme_minimal() +
scale_x_continuous( limits = c(0,max(I(infections_3mma/1000)))) +
xlab("3-Month Moving Average of Daily Infections (in 000's)") +
ylab("Sold Price (Actual)") +
labs(title = "Infections and Price",
caption = "")
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# "#ff6c67", "#00c2c6"
ggplot(data_factor, aes(x = infections_period, y = sold_price/1000, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
coord_flip() +
theme_ipsum() +
theme(
legend.position="none") +
xlab("Infections Present (1 = yes)") +
ylab("Sold Price (in 000's)") +
scale_fill_manual(values=c(very_low, med)) +
labs(title = "Comparison of Sold Price",
caption = "e")
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
# Plots
ggplot(data_factor_core, aes(x = infections_3mma)) +
geom_smooth(data_factor_core, mapping = aes(y = sold_price), color = "grey50") + # Actual Data
geom_smooth(ggpredict_1, mapping = aes(x, predicted), linetype = "dashed", color = low) + # Controlled model
geom_smooth(ggpredict_2, mapping = aes(x, predicted), linetype = "dashed", color = med) + # Best single fit
ggtitle("Model Fit Overview")
`geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
`geom_smooth()` using method = 'loess' and formula 'y ~ x'
gridExtra::grid.arrange(a)
gridExtra::grid.arrange(b)
gridExtra::grid.arrange(c)
gridExtra::grid.arrange(d)
gridExtra::grid.arrange(e)
Ideas
coeftest(lm_corona_bedrooms, vcov = vcovHC(lm_corona_bedrooms, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.3414e+05 3.0157e+04 7.7640 8.553e-15 ***
ac_typenone -5.4498e+04 1.9525e+03 -27.9111 < 2.2e-16 ***
ac_typenot_central -2.2087e+04 1.6931e+03 -13.0457 < 2.2e-16 ***
patio1 1.2472e+04 8.3412e+02 14.9527 < 2.2e-16 ***
school_general1 8.2759e+03 1.0945e+03 7.5614 4.129e-14 ***
photo_count 1.3248e+03 4.9480e+01 26.7740 < 2.2e-16 ***
pool1 1.9075e+04 1.5001e+03 12.7159 < 2.2e-16 ***
roof_typeother 7.3681e+03 1.4621e+03 5.0392 4.708e-07 ***
roof_typeshingle 2.7197e+04 1.6956e+03 16.0396 < 2.2e-16 ***
roof_typeslate 1.5496e+04 9.0682e+03 1.7088 0.0874995 .
gas_typenatural -1.0756e+05 3.4559e+03 -31.1233 < 2.2e-16 ***
gas_typenone -1.3865e+05 2.2971e+03 -60.3594 < 2.2e-16 ***
gas_typepropane -9.3236e+04 1.8180e+04 -5.1285 2.943e-07 ***
gas_typeunknown -1.3842e+05 2.1427e+03 -64.5988 < 2.2e-16 ***
out_building1 -5.5192e+03 8.8805e+02 -6.2149 5.218e-10 ***
appliances1 2.5898e+04 1.1928e+03 21.7118 < 2.2e-16 ***
property_conditionnew -2.0935e+04 6.3471e+03 -3.2983 0.0009741 ***
property_conditionother -2.0956e+04 1.0429e+03 -20.0948 < 2.2e-16 ***
energy_efficient1 1.8928e+04 8.8970e+02 21.2746 < 2.2e-16 ***
exterior_typemetal -4.0964e+03 2.4309e+03 -1.6852 0.0919667 .
exterior_typeother 1.3327e+04 1.1559e+03 11.5302 < 2.2e-16 ***
exterior_typevinyl 3.0630e+03 1.2148e+03 2.5213 0.0116992 *
exterior_typewood 6.8287e+02 1.8873e+03 0.3618 0.7174878
exterior_featurescourtyard 3.8981e+04 1.4928e+04 2.6113 0.0090249 **
exterior_featuresfence -2.3394e+04 5.4658e+03 -4.2800 1.876e-05 ***
exterior_featuresnone -1.3995e+04 5.4825e+03 -2.5528 0.0106928 *
exterior_featuresporch -2.0091e+04 5.5495e+03 -3.6203 0.0002948 ***
exterior_featurestennis_court 2.3977e+04 1.3892e+04 1.7260 0.0843658 .
fireplace1 3.1903e+04 8.3534e+02 38.1915 < 2.2e-16 ***
foundation_typeslab 2.0170e+04 1.3210e+03 15.2687 < 2.2e-16 ***
foundation_typeunspecified 9.7919e+03 1.4755e+03 6.6362 3.286e-11 ***
beds_total1 -7.1968e+04 2.9707e+04 -2.4226 0.0154172 *
beds_total2 -5.4848e+04 2.9462e+04 -1.8616 0.0626666 .
beds_total3 -2.8699e+04 2.9463e+04 -0.9741 0.3300308
beds_total4 1.0509e+04 2.9483e+04 0.3564 0.7215164
beds_total5 1.7689e+04 3.0042e+04 0.5888 0.5559999
age -2.1730e+03 8.0380e+01 -27.0343 < 2.2e-16 ***
dom 8.3326e+00 6.9823e+00 1.1934 0.2327235
sewer_typeseptic -4.5359e+03 1.5203e+03 -2.9836 0.0028514 **
sewer_typeunspecified -4.4686e+03 8.1555e+02 -5.4792 4.314e-08 ***
property_stylenot_mobile 7.2464e+04 1.8129e+03 39.9708 < 2.2e-16 ***
subdivision1 2.7805e+03 9.7726e+02 2.8452 0.0044416 **
water_typewell -3.2549e+03 4.4298e+03 -0.7348 0.4624852
waterfront1 2.7540e+04 1.6343e+03 16.8507 < 2.2e-16 ***
bottom25_dom1 1.2415e+04 1.0822e+03 11.4716 < 2.2e-16 ***
age_2 1.9417e+01 1.1018e+00 17.6236 < 2.2e-16 ***
data_factor$infections_3mma -2.8687e+01 1.5003e+01 -1.9121 0.0558708 .
beds_total1:data_factor$infections_3mma 2.5212e+01 1.5454e+01 1.6314 0.1028173
beds_total2:data_factor$infections_3mma 3.2225e+01 1.5051e+01 2.1411 0.0322755 *
beds_total3:data_factor$infections_3mma 3.6766e+01 1.5014e+01 2.4488 0.0143384 *
beds_total4:data_factor$infections_3mma 3.7085e+01 1.5053e+01 2.4637 0.0137574 *
beds_total5:data_factor$infections_3mma 4.7405e+01 1.6056e+01 2.9525 0.0031552 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Find the mean of each group
library(plyr)
price_means <- ddply(data_factor, "infections_period", summarise, price_mean = mean(sold_price, na.rm = TRUE))
# Distribution: Total
ggplot(data_factor, aes(x = sold_price)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
ggtitle("Price Distribution") +
geom_vline(data=price_means, aes(xintercept = mean(sold_price)), linetype="dashed", size= 0.4, color = very_low, alpha = 0.8) +
xlab("Sold Price") +
ylab("Density")
# Distribution: Infection
ggplot(data_factor, aes(x = sold_price, fill = infections_period)) +
geom_density(alpha = 0.5, position = "identity") +
ggtitle("Price Distributions") +
geom_vline(data=price_means, aes(xintercept = price_means[2,2]), linetype="dashed", size= 0.4, color = med, alpha = 0.8) +
geom_vline(data = price_means, aes(xintercept = price_means[1,2]), linetype="dashed", size= 0.4, color = very_low, alpha = 0.8) +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post")) +
xlab("Sold Price") +
ylab("Density") +
labs(fill = "Infection Period")
# Distribution: Top vs. Bottom
ggplot(data_factor) +
geom_density(aes(x = sold_price, fill = infections_period), alpha = 0.5, position = "identity") +
facet_grid(vars(top25_sold_price, bottom25_sold_price), scales = "free") +
ggtitle("Price Distributions") +
scale_fill_manual(values=c(very_low, med)) +
xlab("Sold Price") +
labs(fill = "Infection Period") +
ylab("Density") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
#Price and Infections
ggplot(data_factor, aes(x = infections_period, y = sold_price, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Comparison of Sold Price") +
xlab("Infection Period") +
scale_fill_manual(values=c(very_low, med)) +
ylab("Sold Price")
coeftest(lm_corona_price_bottom, vcov = vcovHC(lm_corona_price_bottom, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.6017e+05 2.1950e+04 11.8531 < 2.2e-16 ***
property_typeDUP -2.1684e+04 1.5731e+04 -1.3784 0.1680918
property_typeOTH 7.3064e+03 8.1447e+03 0.8971 0.3696882
property_typePAT 9.6853e+03 4.8857e+03 1.9824 0.0474454 *
property_typeSGL 1.8003e+04 2.2688e+03 7.9351 2.194e-15 ***
property_typeTNH -4.5428e+03 2.8170e+03 -1.6126 0.1068353
ac_typenone -2.4963e+04 1.3345e+03 -18.7056 < 2.2e-16 ***
ac_typenot_central -3.5920e+03 1.2040e+03 -2.9833 0.0028541 **
patio1 4.1811e+03 6.4168e+02 6.5158 7.372e-11 ***
school_general1 6.8812e+03 8.4726e+02 8.1217 4.811e-16 ***
photo_count 5.9194e+02 3.8741e+01 15.2795 < 2.2e-16 ***
pool1 1.0669e+04 1.2351e+03 8.6380 < 2.2e-16 ***
roof_typeother -3.3318e+02 1.1443e+03 -0.2912 0.7709303
roof_typeshingle 1.1199e+04 1.3489e+03 8.3025 < 2.2e-16 ***
roof_typeslate 2.6920e+03 7.2137e+03 0.3732 0.7090169
gas_typenatural -7.3831e+04 2.9774e+03 -24.7972 < 2.2e-16 ***
gas_typenone -1.0662e+05 2.0419e+03 -52.2141 < 2.2e-16 ***
gas_typepropane -6.9963e+04 1.4886e+04 -4.7000 2.616e-06 ***
gas_typeunknown -1.0796e+05 1.9714e+03 -54.7669 < 2.2e-16 ***
out_building1 -6.4916e+03 6.8667e+02 -9.4537 < 2.2e-16 ***
area_living -6.9603e+00 5.2515e+00 -1.3254 0.1850514
land_acres 1.9796e+03 7.4583e+02 2.6542 0.0079554 **
appliances1 1.0788e+04 8.4929e+02 12.7019 < 2.2e-16 ***
garage1 6.8509e+03 6.3401e+02 10.8056 < 2.2e-16 ***
property_conditionnew -8.4140e+03 5.3106e+03 -1.5844 0.1131184
property_conditionother -1.0277e+04 8.3230e+02 -12.3471 < 2.2e-16 ***
energy_efficient1 1.0499e+04 7.0947e+02 14.7989 < 2.2e-16 ***
exterior_typemetal -8.7770e+02 1.8545e+03 -0.4733 0.6360072
exterior_typeother 7.9162e+03 8.8607e+02 8.9341 < 2.2e-16 ***
exterior_typevinyl 1.8030e+03 9.2587e+02 1.9474 0.0515005 .
exterior_typewood 2.4987e+03 1.3755e+03 1.8166 0.0692898 .
exterior_featurescourtyard 2.3714e+04 1.2638e+04 1.8764 0.0606085 .
exterior_featuresfence -2.4910e+04 4.2901e+03 -5.8063 6.466e-09 ***
exterior_featuresnone -2.0388e+04 4.2886e+03 -4.7539 2.007e-06 ***
exterior_featuresporch -2.4831e+04 4.3365e+03 -5.7260 1.040e-08 ***
exterior_featurestennis_court 2.3169e+03 1.0105e+04 0.2293 0.8186463
fireplace1 1.0708e+04 6.8071e+02 15.7301 < 2.2e-16 ***
foundation_typeslab 4.5790e+03 1.0141e+03 4.5155 6.348e-06 ***
foundation_typeunspecified 2.5181e+03 1.0937e+03 2.3023 0.0213285 *
beds_total1 -8.8157e+03 2.0732e+04 -0.4252 0.6706851
beds_total2 -1.8691e+04 2.0643e+04 -0.9055 0.3652225
beds_total3 -2.6547e+04 2.0670e+04 -1.2843 0.1990422
beds_total4 -2.0696e+04 2.0696e+04 -1.0000 0.3173106
beds_total5 -3.5958e+04 2.1091e+04 -1.7049 0.0882328 .
bath_full1 -1.4452e+04 1.3423e+04 -1.0766 0.2816580
bath_full2 -7.5442e+03 1.3405e+04 -0.5628 0.5735714
bath_full3 1.4197e+04 1.3524e+04 1.0497 0.2938466
bath_full4 7.3672e+03 2.0346e+04 0.3621 0.7172836
bath_full6 4.0238e+04 1.4399e+04 2.7945 0.0052018 **
bath_half1 1.1964e+04 9.6620e+02 12.3824 < 2.2e-16 ***
bath_half2 2.3815e+04 7.0879e+03 3.3599 0.0007808 ***
bath_half3 5.8803e+04 9.6367e+03 6.1020 1.063e-09 ***
bath_half4 1.0372e+05 2.8037e+03 36.9928 < 2.2e-16 ***
bath_half5 -2.7354e+04 2.1637e+04 -1.2642 0.2061652
age -1.5156e+03 6.6548e+01 -22.7751 < 2.2e-16 ***
dom -1.1463e+01 5.3153e+00 -2.1567 0.0310414 *
sewer_typeseptic -6.1909e+03 1.1518e+03 -5.3749 7.732e-08 ***
sewer_typeunspecified -4.5154e+03 6.2787e+02 -7.1916 6.591e-13 ***
property_stylenot_mobile 2.8815e+04 1.5438e+03 18.6650 < 2.2e-16 ***
subdivision1 2.4505e+03 7.3353e+02 3.3407 0.0008370 ***
water_typewell 2.4004e+03 3.2270e+03 0.7438 0.4569754
waterfront1 1.7312e+04 1.2796e+03 13.5289 < 2.2e-16 ***
bottom25_dom1 7.6382e+03 8.1707e+02 9.3483 < 2.2e-16 ***
age_2 1.3323e+01 9.1771e-01 14.5177 < 2.2e-16 ***
area_living_2 1.5910e-02 1.5353e-03 10.3627 < 2.2e-16 ***
data_factor$infections_3mma 7.8359e+00 4.6475e-01 16.8604 < 2.2e-16 ***
bottom25_sold_price -7.9381e+04 7.9732e+02 -99.5596 < 2.2e-16 ***
data_factor$infections_3mma:bottom25_sold_price -5.1963e+00 7.5026e-01 -6.9259 4.440e-12 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#Age on Infections
ggplot(data_factor, aes(x = infections_period, y = age, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=14)) +
ggtitle("Comparison of Age") +
xlab("Infection Period") +
ylab("Age of Property") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
coeftest(lm_corona_age_bottom, vcov = vcovHC(lm_corona_age_bottom, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.2697e+05 3.1235e+04 4.0649 4.821e-05 ***
ac_typenone -4.4551e+04 1.9540e+03 -22.8002 < 2.2e-16 ***
ac_typenot_central -1.4115e+04 1.5258e+03 -9.2508 < 2.2e-16 ***
patio1 8.8898e+03 7.5883e+02 11.7151 < 2.2e-16 ***
school_general1 1.0875e+04 1.0179e+03 10.6833 < 2.2e-16 ***
photo_count 8.7047e+02 4.7073e+01 18.4919 < 2.2e-16 ***
pool1 8.6980e+03 1.3425e+03 6.4787 9.426e-11 ***
roof_typeother 3.2938e+03 1.4117e+03 2.3331 0.0196485 *
roof_typeshingle 2.1981e+04 1.6190e+03 13.5770 < 2.2e-16 ***
roof_typeslate 7.4667e+03 8.7967e+03 0.8488 0.3959985
gas_typenatural -9.3517e+04 3.5479e+03 -26.3584 < 2.2e-16 ***
gas_typenone -1.2593e+05 2.5036e+03 -50.3013 < 2.2e-16 ***
gas_typepropane -9.3374e+04 1.8322e+04 -5.0962 3.491e-07 ***
gas_typeunknown -1.2906e+05 2.4005e+03 -53.7657 < 2.2e-16 ***
out_building1 -6.3411e+03 8.0363e+02 -7.8906 3.132e-15 ***
land_acres 3.7274e+03 9.3788e+02 3.9743 7.080e-05 ***
appliances1 2.5344e+04 1.1141e+03 22.7473 < 2.2e-16 ***
garage1 1.3975e+04 7.4840e+02 18.6729 < 2.2e-16 ***
property_conditionnew -5.0567e+03 6.2096e+03 -0.8143 0.4154609
property_conditionother -2.0425e+04 9.3375e+02 -21.8744 < 2.2e-16 ***
energy_efficient1 1.5099e+04 8.2260e+02 18.3556 < 2.2e-16 ***
exterior_typemetal -2.5826e+02 2.3574e+03 -0.1096 0.9127626
exterior_typeother 1.2023e+04 1.0621e+03 11.3193 < 2.2e-16 ***
exterior_typevinyl 5.5811e+03 1.0999e+03 5.0740 3.924e-07 ***
exterior_typewood 3.2842e+03 1.7401e+03 1.8874 0.0591170 .
exterior_featurescourtyard 4.5132e+04 1.5039e+04 3.0009 0.0026945 **
exterior_featuresfence -1.4935e+04 4.9711e+03 -3.0044 0.0026641 **
exterior_featuresnone -7.0656e+03 4.9869e+03 -1.4168 0.1565455
exterior_featuresporch -1.2793e+04 5.0376e+03 -2.5394 0.0111099 *
exterior_featurestennis_court 1.9681e+04 1.0734e+04 1.8335 0.0667358 .
fireplace1 1.2147e+04 8.0986e+02 14.9993 < 2.2e-16 ***
foundation_typeslab 1.3287e+04 1.2600e+03 10.5449 < 2.2e-16 ***
foundation_typeunspecified 7.2193e+03 1.4077e+03 5.1285 2.943e-07 ***
beds_total1 -2.4876e+04 2.7454e+04 -0.9061 0.3649013
beds_total2 -2.7848e+04 2.7266e+04 -1.0214 0.3070895
beds_total3 -2.5167e+04 2.7263e+04 -0.9231 0.3559582
beds_total4 -2.0523e+04 2.7292e+04 -0.7520 0.4520777
beds_total5 -3.6440e+04 2.7717e+04 -1.3147 0.1886165
bath_full1 -3.8425e+04 2.3952e+04 -1.6043 0.1086671
bath_full2 -1.3304e+04 2.3946e+04 -0.5556 0.5784882
bath_full3 6.7324e+03 2.4019e+04 0.2803 0.7792514
bath_full4 1.1122e+03 2.9704e+04 0.0374 0.9701332
bath_full6 -9.3526e+03 2.4547e+04 -0.3810 0.7031968
bath_half1 1.0957e+04 1.0886e+03 10.0654 < 2.2e-16 ***
bath_half2 3.0347e+04 6.6151e+03 4.5875 4.508e-06 ***
bath_half3 6.3102e+04 9.8354e+03 6.4158 1.427e-10 ***
bath_half4 8.8086e+04 3.1342e+03 28.1044 < 2.2e-16 ***
bath_half5 -5.2038e+04 2.5797e+04 -2.0172 0.0436870 *
dom -2.3526e+01 6.3618e+00 -3.6980 0.0002178 ***
sold_date 1.2018e+00 4.5403e-01 2.6470 0.0081272 **
sewer_typeseptic -6.1146e+03 1.4223e+03 -4.2992 1.721e-05 ***
sewer_typeunspecified -3.7371e+03 7.3964e+02 -5.0526 4.389e-07 ***
property_stylenot_mobile 6.9362e+04 1.7331e+03 40.0209 < 2.2e-16 ***
subdivision1 3.5289e+03 9.0359e+02 3.9054 9.432e-05 ***
water_typewell -5.1055e+02 3.9561e+03 -0.1291 0.8973140
waterfront1 2.0044e+04 1.4711e+03 13.6256 < 2.2e-16 ***
bottom25_dom1 1.0831e+04 9.7524e+02 11.1063 < 2.2e-16 ***
area_living_2 1.6847e-02 4.0061e-04 42.0532 < 2.2e-16 ***
data_factor$infections_3mma 8.6802e+00 6.9100e-01 12.5617 < 2.2e-16 ***
bottom25_age 2.5461e+04 9.3603e+02 27.2015 < 2.2e-16 ***
data_factor$infections_3mma:bottom25_age 7.9891e-01 8.6329e-01 0.9254 0.3547531
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#area_living on Infections
ggplot(data_factor, aes(x = infections_period, y = sold_price/area_living, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Comparison of Living Area per Sqft.") +
xlab("Infection Period") +
ylab("Price per Living Area") +
scale_fill_manual(values=c(very_low, med)) +
scale_y_continuous(limits = c(0,250))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
Warning: Removed 68 rows containing non-finite values (stat_ydensity).
Warning: Removed 68 rows containing non-finite values (stat_boxplot).
coeftest(lm_corona_area_living_top, vcov = vcovHC(lm_corona_area_living_top, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.1786e+05 3.1676e+04 6.8776 6.232e-12 ***
ac_typenone -4.5055e+04 1.9559e+03 -23.0347 < 2.2e-16 ***
ac_typenot_central -1.4868e+04 1.5495e+03 -9.5953 < 2.2e-16 ***
patio1 9.0080e+03 7.7344e+02 11.6466 < 2.2e-16 ***
school_general1 8.5012e+03 1.0371e+03 8.1972 2.580e-16 ***
photo_count 1.0514e+03 4.8373e+01 21.7341 < 2.2e-16 ***
pool1 1.1293e+04 1.3644e+03 8.2768 < 2.2e-16 ***
roof_typeother 4.4255e+03 1.4022e+03 3.1562 0.001601 **
roof_typeshingle 2.2060e+04 1.6174e+03 13.6392 < 2.2e-16 ***
roof_typeslate 7.5155e+03 8.9239e+03 0.8422 0.399699
gas_typenatural -1.0170e+05 3.5575e+03 -28.5866 < 2.2e-16 ***
gas_typenone -1.3662e+05 2.4951e+03 -54.7537 < 2.2e-16 ***
gas_typepropane -1.0682e+05 1.7037e+04 -6.2695 3.682e-10 ***
gas_typeunknown -1.3645e+05 2.3853e+03 -57.2035 < 2.2e-16 ***
out_building1 -4.6203e+03 8.2442e+02 -5.6043 2.114e-08 ***
land_acres 5.3460e+03 9.4296e+02 5.6694 1.449e-08 ***
appliances1 2.4211e+04 1.1233e+03 21.5532 < 2.2e-16 ***
garage1 1.4565e+04 7.6157e+02 19.1246 < 2.2e-16 ***
property_conditionnew -1.8401e+04 6.3472e+03 -2.8992 0.003745 **
property_conditionother -1.9911e+04 9.6101e+02 -20.7193 < 2.2e-16 ***
energy_efficient1 1.4472e+04 8.3430e+02 17.3469 < 2.2e-16 ***
exterior_typemetal -1.9967e+03 2.3583e+03 -0.8466 0.397202
exterior_typeother 1.1470e+04 1.0702e+03 10.7168 < 2.2e-16 ***
exterior_typevinyl 3.3346e+03 1.1187e+03 2.9807 0.002878 **
exterior_typewood 1.4095e+03 1.7583e+03 0.8016 0.422795
exterior_featurescourtyard 4.3545e+04 1.4388e+04 3.0266 0.002476 **
exterior_featuresfence -1.3817e+04 5.1863e+03 -2.6641 0.007724 **
exterior_featuresnone -6.5726e+03 5.2004e+03 -1.2639 0.206292
exterior_featuresporch -1.3384e+04 5.2569e+03 -2.5460 0.010902 *
exterior_featurestennis_court 2.0086e+04 1.1798e+04 1.7025 0.088672 .
fireplace1 1.9245e+04 7.9863e+02 24.0978 < 2.2e-16 ***
foundation_typeslab 1.3112e+04 1.2770e+03 10.2682 < 2.2e-16 ***
foundation_typeunspecified 7.1261e+03 1.4117e+03 5.0479 4.499e-07 ***
beds_total1 -2.4323e+04 2.8620e+04 -0.8499 0.395408
beds_total2 -2.0505e+04 2.8451e+04 -0.7207 0.471080
beds_total3 -8.6578e+03 2.8444e+04 -0.3044 0.760845
beds_total4 2.7001e+03 2.8461e+04 0.0949 0.924420
beds_total5 -4.1571e+03 2.8867e+04 -0.1440 0.885494
bath_full1 -6.0233e+04 2.5477e+04 -2.3642 0.018076 *
bath_full2 -2.2302e+04 2.5477e+04 -0.8754 0.381375
bath_full3 7.4722e+03 2.5549e+04 0.2925 0.769934
bath_full4 1.4402e+04 3.1333e+04 0.4596 0.645772
bath_full6 -2.8661e+04 2.6137e+04 -1.0965 0.272850
bath_half1 1.6583e+04 1.0888e+03 15.2302 < 2.2e-16 ***
bath_half2 4.0757e+04 6.9462e+03 5.8675 4.482e-09 ***
bath_half3 7.1768e+04 1.0544e+04 6.8065 1.023e-11 ***
bath_half4 7.0281e+04 3.5280e+03 19.9209 < 2.2e-16 ***
bath_half5 -4.1415e+04 4.2895e+04 -0.9655 0.334309
age -1.9822e+03 7.9428e+01 -24.9554 < 2.2e-16 ***
dom -1.5428e+01 6.4584e+00 -2.3888 0.016913 *
sold_date 6.1595e-01 4.7569e-01 1.2949 0.195377
sewer_typeseptic -6.0698e+03 1.4378e+03 -4.2215 2.436e-05 ***
sewer_typeunspecified -4.9915e+03 7.5983e+02 -6.5692 5.161e-11 ***
property_stylenot_mobile 7.3425e+04 1.7430e+03 42.1247 < 2.2e-16 ***
subdivision1 2.8893e+03 9.1343e+02 3.1631 0.001563 **
water_typewell 1.6295e+03 4.0353e+03 0.4038 0.686356
waterfront1 2.0708e+04 1.4983e+03 13.8212 < 2.2e-16 ***
bottom25_dom1 1.1119e+04 9.9811e+02 11.1405 < 2.2e-16 ***
age_2 1.8417e+01 1.0995e+00 16.7510 < 2.2e-16 ***
data_factor$infections_3mma 8.6770e+00 5.9698e-01 14.5349 < 2.2e-16 ***
top25_area_living 3.8537e+04 1.3446e+03 28.6604 < 2.2e-16 ***
data_factor$infections_3mma:top25_area_living 9.7205e-01 1.2004e+00 0.8098 0.418087
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Conditional Mean
library(plyr)
dom_mean_data <- ddply(data_factor, "infections_period", summarise, dom_mean = mean(dom, na.rm = TRUE))
# Distribution: Just for City
ggplot(data_factor, aes(x = dom)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
ggtitle("Days on Market Distribution") +
geom_vline(aes(xintercept = mean(dom)), linetype="dashed", size= 0.4, alpha = 0.5, color = very_low) +
xlab("Days on Market") +
ylab("Density")
# Distribution: Infection
ggplot(data_factor, aes(x = dom, fill = infections_period)) +
geom_density(alpha = 0.5, position = "identity") +
ggtitle("Days on Market Distributions") +
geom_vline(data = dom_mean_data, aes(xintercept = dom_mean_data[2,2]), linetype="dashed", size= 0.5, color = med, alpha = 0.8) +
geom_vline(data = dom_mean_data, aes(xintercept = dom_mean_data[1,2]), linetype="dashed", size= 0.5, alpha = 0.8, color = very_low) +
xlab("Days on Market") +
ylab("Density") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
# Distribution: Top vs. Bottom
ggplot(data_factor) +
geom_density(aes(x = dom, fill = infections_period), alpha = 0.5, position = "identity") +
facet_grid(vars(top25_dom, bottom25_dom), scales = "free") +
ggtitle("Days on Market Distributions") +
xlab("Days on Market") +
ylab("Density") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
#dom on Infections
ggplot(data_factor, aes(x = infections_period, y = dom, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
#coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=11)) +
ggtitle("Comparison of Days on Market") +
xlab("Infection Period") +
ylab("Days on Market") +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post"))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
coeftest(lm_corona_dom_bottom, vcov = vcovHC(lm_corona_dom_bottom, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.7107e+05 3.2306e+04 5.2953 1.199e-07 ***
ac_typenone -4.3404e+04 1.9672e+03 -22.0638 < 2.2e-16 ***
ac_typenot_central -1.3244e+04 1.5285e+03 -8.6644 < 2.2e-16 ***
patio1 7.9510e+03 7.5105e+02 10.5866 < 2.2e-16 ***
school_general1 1.0340e+04 1.0042e+03 10.2968 < 2.2e-16 ***
photo_count 9.5789e+02 4.6938e+01 20.4074 < 2.2e-16 ***
pool1 9.9372e+03 1.3281e+03 7.4821 7.561e-14 ***
roof_typeother 2.6563e+03 1.4045e+03 1.8913 0.0585958 .
roof_typeshingle 1.9971e+04 1.6039e+03 12.4519 < 2.2e-16 ***
roof_typeslate 6.1511e+03 8.7277e+03 0.7048 0.4809591
gas_typenatural -1.0026e+05 3.4236e+03 -29.2838 < 2.2e-16 ***
gas_typenone -1.3330e+05 2.4175e+03 -55.1396 < 2.2e-16 ***
gas_typepropane -1.0165e+05 1.7735e+04 -5.7315 1.007e-08 ***
gas_typeunknown -1.3740e+05 2.3153e+03 -59.3429 < 2.2e-16 ***
out_building1 -5.0020e+03 8.0229e+02 -6.2347 4.601e-10 ***
area_living 4.3924e+01 6.0025e+00 7.3176 2.603e-13 ***
land_acres 3.1769e+03 9.3297e+02 3.4052 0.0006623 ***
appliances1 2.4716e+04 1.1047e+03 22.3735 < 2.2e-16 ***
garage1 1.2482e+04 7.4492e+02 16.7561 < 2.2e-16 ***
property_conditionnew -2.2693e+04 6.0403e+03 -3.7569 0.0001725 ***
property_conditionother -2.0434e+04 9.2182e+02 -22.1671 < 2.2e-16 ***
energy_efficient1 1.3977e+04 8.1089e+02 17.2373 < 2.2e-16 ***
exterior_typemetal 1.0138e+02 2.3235e+03 0.0436 0.9651991
exterior_typeother 1.0975e+04 1.0409e+03 10.5441 < 2.2e-16 ***
exterior_typevinyl 5.0221e+03 1.0863e+03 4.6229 3.803e-06 ***
exterior_typewood 2.6632e+03 1.7144e+03 1.5535 0.1203185
exterior_featurescourtyard 4.1749e+04 1.4307e+04 2.9182 0.0035241 **
exterior_featuresfence -1.5150e+04 4.9367e+03 -3.0688 0.0021514 **
exterior_featuresnone -8.7504e+03 4.9532e+03 -1.7666 0.0773066 .
exterior_featuresporch -1.5513e+04 5.0065e+03 -3.0985 0.0019472 **
exterior_featurestennis_court 1.8387e+04 1.0849e+04 1.6947 0.0901406 .
fireplace1 1.1927e+04 8.0873e+02 14.7472 < 2.2e-16 ***
foundation_typeslab 1.3986e+04 1.2569e+03 11.1274 < 2.2e-16 ***
foundation_typeunspecified 8.1005e+03 1.3966e+03 5.8003 6.702e-09 ***
beds_total1 -3.0210e+04 2.6492e+04 -1.1404 0.2541445
beds_total2 -3.8584e+04 2.6387e+04 -1.4622 0.1436894
beds_total3 -3.8581e+04 2.6423e+04 -1.4601 0.1442628
beds_total4 -3.4451e+04 2.6453e+04 -1.3024 0.1928085
beds_total5 -4.9987e+04 2.6861e+04 -1.8609 0.0627692 .
bath_full1 -3.1190e+04 2.3436e+04 -1.3308 0.1832546
bath_full2 -9.6994e+03 2.3424e+04 -0.4141 0.6788250
bath_full3 1.2111e+04 2.3511e+04 0.5151 0.6064690
bath_full4 8.9598e+03 2.9138e+04 0.3075 0.7584715
bath_full6 -1.3950e+04 2.4076e+04 -0.5794 0.5623287
bath_half1 1.1111e+04 1.0808e+03 10.2800 < 2.2e-16 ***
bath_half2 3.1065e+04 6.8469e+03 4.5371 5.730e-06 ***
bath_half3 5.8056e+04 1.1205e+04 5.1814 2.220e-07 ***
bath_half4 8.7758e+04 3.1894e+03 27.5153 < 2.2e-16 ***
bath_half5 -5.6301e+04 2.8378e+04 -1.9839 0.0472744 *
age -1.9126e+03 7.9668e+01 -24.0072 < 2.2e-16 ***
sold_date 2.6907e-01 4.6365e-01 0.5803 0.5616918
sewer_typeseptic -5.6527e+03 1.4179e+03 -3.9865 6.724e-05 ***
sewer_typeunspecified -4.2515e+03 7.3564e+02 -5.7793 7.592e-09 ***
property_stylenot_mobile 6.7594e+04 1.7413e+03 38.8190 < 2.2e-16 ***
subdivision1 3.5261e+03 8.9118e+02 3.9567 7.621e-05 ***
water_typewell 1.3123e+03 3.9007e+03 0.3364 0.7365576
waterfront1 1.9832e+04 1.4576e+03 13.6066 < 2.2e-16 ***
bottom25_dom1 1.3495e+04 9.6775e+02 13.9442 < 2.2e-16 ***
age_2 1.7151e+01 1.1195e+00 15.3204 < 2.2e-16 ***
area_living_2 5.3725e-03 1.7186e-03 3.1262 0.0017730 **
data_factor$infections_3mma 1.0086e+01 7.3559e-01 13.7116 < 2.2e-16 ***
bottom25_dom1:data_factor$infections_3mma -2.1852e+00 8.9042e-01 -2.4541 0.0141291 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Distribution: Total
ggplot(data = data_factor, aes(x = sold_price)) +
geom_density(mapping = aes(fill = low, alpha = 0.5, position = "identity")) +
ggtitle("Price Distributions of All Properties") +
theme(legend.position = "none") +
xlab("Sold Price") +
ylab("Density") +
scale_fill_manual(values = c(very_low))
Warning: Ignoring unknown aesthetics: position
# Distribution: City vs non-city
# Conditional Mean: City vs Rural
library(plyr)
city_limits_mean_data <- ddply(data_factor, "city_limits", summarise, city_limits_mean = mean(sold_price, na.rm = TRUE))
ggplot(data = data_factor, aes(x = sold_price, fill = city_limits)) +
geom_density(alpha = 0.5, position = "identity") +
ggtitle("Price Distributions of City vs Rural") +
geom_vline(data = city_limits_mean_data, aes(xintercept = city_limits_mean_data[2,2]), linetype="dashed", size= 0.5, color = med, alpha = 0.8) +
geom_vline(data = city_limits_mean_data, aes(xintercept = city_limits_mean_data[1,2]), linetype="dashed", size= 0.5, alpha = 0.8, color = very_low) +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Rural", "City")) +
xlab("Sold Price") +
ylab("Density")
# Conditional Mean: City pre vs post corona
library(plyr)
city_limits_mean_data <- ddply(subset(data_factor, data_factor$city_limits ==1 ), "infections_period", summarise, city_limits_mean = mean(sold_price, na.rm = TRUE))
# Distribution: Just City
# Conditional Mean: City pre vs post corona
library(plyr)
city_limits_mean_data <- ddply(subset(data_factor, data_factor$city_limits ==1 ), "infections_period", summarise, city_limits_mean = mean(sold_price, na.rm = TRUE))
ggplot(data = subset(data_factor, data_factor$city_limits ==1), aes(x = sold_price)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
ggtitle("Price Distribution of Properties in City Limits") +
geom_vline(aes(xintercept = mean(city_limits)), linetype="dashed", size= 0.4, alpha = 0.5) +
xlab("Sold Price") +
ylab("Density")
Warning in mean.default(city_limits) :
argument is not numeric or logical: returning NA
Warning: Removed 23399 rows containing missing values (geom_vline).
# Distribution: Infection
ggplot(data = subset(data_factor, data_factor$city_limits ==1), aes(x = sold_price, fill = infections_period)) +
geom_density(alpha = 0.5, position = "identity") +
ggtitle("Price Distributions of Properties in City Limits") +
geom_vline(data = city_limits_mean_data, aes(xintercept = city_limits_mean_data[2,2]), linetype="dashed", size= 0.5, color = med, alpha = 0.8) +
geom_vline(data = city_limits_mean_data, aes(xintercept = city_limits_mean_data[1,2]), linetype="dashed", size= 0.5, alpha = 0.8, color = very_low) +
scale_fill_manual(values = c(very_low, med),
name = "Infection Period",
labels = c("Pre", "Post")) +
xlab("Sold Price") +
ylab("Density")
#city_limits on Infections
ggplot(data_factor, aes(x = city_limits, y = sold_price, fill = infections_period)) +
geom_violin(alpha = 0.5) +
geom_boxplot(width=0.1, alpha = 0.9) +
scale_fill_viridis(discrete = TRUE, alpha=0.6, option="D") +
#coord_flip() +
theme_ipsum() +
theme(
legend.position="none",
plot.title = element_text(size=14)) +
ggtitle("Comparison of Price: City Limts and Pre vs. Post Corona") +
xlab("City Limits and Infection Period") +
ylab("Sold Price") +
scale_fill_manual(values = c(very_low, med),
name = "City Limits",
labels = c("Pre", "Post"))
Scale for 'fill' is already present. Adding another scale for 'fill', which will replace the existing scale.
# Testing Corona, City Limits
lm_corona_city <- lm(sold_price ~ .
# test variable(s)
+ data_factor$infections_3mma + data_factor$city_limits
+ data_factor$infections_3mma*data_factor$city_limits
,data = data_factor_core_clean)
coeftest(lm_corona_city, vcov = vcovHC(lm_corona_city, method = "White2", type = "HC0"))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.6039e+05 3.3079e+04 4.8487 1.251e-06 ***
property_typeDUP -5.1534e+04 1.4834e+04 -3.4740 0.0005136 ***
property_typeOTH 1.8189e+04 1.2173e+04 1.4942 0.1351256
property_typePAT 1.6010e+04 5.5366e+03 2.8916 0.0038360 **
property_typeSGL 2.1938e+04 2.6434e+03 8.2993 < 2.2e-16 ***
property_typeTNH -3.7855e+03 3.1523e+03 -1.2009 0.2298107
ac_typenone -4.4470e+04 1.9522e+03 -22.7800 < 2.2e-16 ***
ac_typenot_central -1.3891e+04 1.5271e+03 -9.0964 < 2.2e-16 ***
patio1 8.1774e+03 7.4902e+02 10.9175 < 2.2e-16 ***
school_general1 1.0584e+04 1.0117e+03 10.4613 < 2.2e-16 ***
photo_count 9.5668e+02 4.6649e+01 20.5079 < 2.2e-16 ***
pool1 1.2070e+04 1.3348e+03 9.0422 < 2.2e-16 ***
roof_typeother 3.4121e+03 1.4035e+03 2.4312 0.0150581 *
roof_typeshingle 2.0811e+04 1.6018e+03 12.9922 < 2.2e-16 ***
roof_typeslate 6.5989e+03 8.7749e+03 0.7520 0.4520463
gas_typenatural -9.6132e+04 3.4602e+03 -27.7820 < 2.2e-16 ***
gas_typenone -1.2948e+05 2.4153e+03 -53.6095 < 2.2e-16 ***
gas_typepropane -9.5196e+04 1.8080e+04 -5.2651 1.413e-07 ***
gas_typeunknown -1.3466e+05 2.3010e+03 -58.5230 < 2.2e-16 ***
out_building1 -5.8765e+03 7.9985e+02 -7.3470 2.091e-13 ***
area_living 4.3348e+01 5.9670e+00 7.2647 3.852e-13 ***
land_acres 2.6636e+03 9.2703e+02 2.8732 0.0040666 **
appliances1 2.4899e+04 1.1024e+03 22.5853 < 2.2e-16 ***
garage1 1.1877e+04 7.4397e+02 15.9649 < 2.2e-16 ***
property_conditionnew -1.9899e+04 5.8217e+03 -3.4181 0.0006317 ***
property_conditionother -2.1005e+04 9.2539e+02 -22.6983 < 2.2e-16 ***
energy_efficient1 1.3781e+04 8.0737e+02 17.0684 < 2.2e-16 ***
exterior_typemetal 3.9234e+01 2.3064e+03 0.0170 0.9864279
exterior_typeother 1.0902e+04 1.0367e+03 10.5161 < 2.2e-16 ***
exterior_typevinyl 4.7668e+03 1.0820e+03 4.4055 1.060e-05 ***
exterior_typewood 2.5348e+03 1.7040e+03 1.4875 0.1368846
exterior_featurescourtyard 3.5349e+04 1.4298e+04 2.4723 0.0134301 *
exterior_featuresfence -2.4675e+04 4.8520e+03 -5.0855 3.693e-07 ***
exterior_featuresnone -1.7867e+04 4.8599e+03 -3.6765 0.0002370 ***
exterior_featuresporch -2.4614e+04 4.9140e+03 -5.0088 5.514e-07 ***
exterior_featurestennis_court 1.0451e+04 1.0610e+04 0.9849 0.3246603
fireplace1 1.1915e+04 8.0574e+02 14.7875 < 2.2e-16 ***
foundation_typeslab 1.5339e+04 1.2542e+03 12.2301 < 2.2e-16 ***
foundation_typeunspecified 8.7877e+03 1.3912e+03 6.3165 2.722e-10 ***
beds_total1 -3.1675e+04 2.4993e+04 -1.2674 0.2050325
beds_total2 -4.3482e+04 2.4904e+04 -1.7460 0.0808248 .
beds_total3 -4.9030e+04 2.4965e+04 -1.9640 0.0495468 *
beds_total4 -4.5468e+04 2.5001e+04 -1.8186 0.0689778 .
beds_total5 -6.0599e+04 2.5432e+04 -2.3828 0.0171878 *
bath_full1 -3.1091e+04 2.4048e+04 -1.2929 0.1960699
bath_full2 -7.6113e+03 2.4040e+04 -0.3166 0.7515426
bath_full3 1.5205e+04 2.4126e+04 0.6302 0.5285382
bath_full4 1.1559e+04 2.9693e+04 0.3893 0.6970565
bath_full6 1.8900e+04 2.4828e+04 0.7613 0.4465139
bath_half1 1.2548e+04 1.0814e+03 11.6033 < 2.2e-16 ***
bath_half2 3.1437e+04 6.9326e+03 4.5347 5.795e-06 ***
bath_half3 5.7186e+04 1.2061e+04 4.7416 2.133e-06 ***
bath_half4 8.3107e+04 3.1313e+03 26.5408 < 2.2e-16 ***
bath_half5 -5.6771e+04 2.8810e+04 -1.9705 0.0487890 *
age -1.9113e+03 7.9957e+01 -23.9041 < 2.2e-16 ***
dom -2.2953e+01 6.3300e+00 -3.6260 0.0002884 ***
sold_date 2.7729e-01 4.5860e-01 0.6046 0.5454273
sewer_typeseptic -5.6058e+03 1.4254e+03 -3.9327 8.423e-05 ***
sewer_typeunspecified -4.1920e+03 7.3928e+02 -5.6703 1.442e-08 ***
property_stylenot_mobile 6.8072e+04 1.7313e+03 39.3185 < 2.2e-16 ***
subdivision1 3.9045e+03 8.8841e+02 4.3949 1.113e-05 ***
water_typewell 4.8366e+03 4.0156e+03 1.2044 0.2284313
waterfront1 1.9906e+04 1.4521e+03 13.7081 < 2.2e-16 ***
bottom25_dom1 1.0916e+04 9.6228e+02 11.3434 < 2.2e-16 ***
age_2 1.6963e+01 1.1259e+00 15.0670 < 2.2e-16 ***
area_living_2 5.2117e-03 1.7073e-03 3.0525 0.0022715 **
data_factor$infections_3mma 4.2364e+00 1.6257e+00 2.6059 0.0091701 **
data_factor$city_limits1 6.3803e+03 2.1725e+03 2.9369 0.0033181 **
data_factor$infections_3mma:data_factor$city_limits1 5.1668e+00 1.6313e+00 3.1673 0.0015405 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# Table output
tab_model(lm_corona_city,
auto.label = TRUE,
collapse.ci = TRUE,
terms = c("(Intercept)",
"data_factor$infections_3mma",
"data_factor$city_limits1",
"data_factor$infections_3mma:data_factor$city_limits1"))
# packages
require(ggplot2)
install.packages("ggmap")
require(maps)
install.packages(Geoc)
#Basic Map
LA <- map_data("state", region="louisiana")
ggplot(LA, aes(x=long, y=lat))+geom_polygon()
# data
salesCalls <- data.frame(State=rep("louisiana",5),
City=c("Baton Rouge","New Orleans", "Shreveport", "Lafayette", "Mandeville"),
Calls=c(10,5,8,13,2))
salesCalls <- cbind(geocode(as.character(salesCalls$City)), salesCalls)
?cbind
ggplot(LA, aes(x=long, y=lat)) +
geom_polygon() +
coord_map() +
geom_point(data=salesCalls, aes(x=lon, y=lat, size=Calls), color="orange")
library(boot) # K-fold
library(leaps) # Subset
library(glmnet) #glmnet() is the main function in the glmnet package (must pass in an x matrix as well as a y vector)
# Set x-y definitions for glmnet package
x <- model.matrix(sold_price ~ . ,data = data_factor_core_clean)[, -1]
y <- data_factor_core_clean$sold_price[1:24653] # Manually restricted due rows not matching with x 'x' for an unknown reason
# General grid
grid <- exp(seq(10, -65, length = 101)) #grid of values from exp(10) [null model] to exp(-15) [least squares]
#Lasso
set.seed(1)
cv.out <- cv.glmnet(x, y, alpha = 1, lambda = grid, nfolds = 10) #lasso
plot(cv.out)
# Base decision
bestlam <- cv.out$lambda.min; bestlam; log(bestlam)
out <- cv.out$glmnet.fit
lasso.coef <- predict(out, type = "coefficients", s = bestlam); lasso.coef; lasso.coef[lasso.coef != 0]
sum(abs(lasso.coef[1:31])) #l1 norm
# +1se decision
bestlam2 <- cv.out$lambda.1se; bestlam2; log(bestlam2)
lasso.coef2 <- predict(out, type = "coefficients", s = bestlam2); lasso.coef2; lasso.coef2[lasso.coef2 != 0]
sum(abs(lasso.coef2[2:31])) #l1 norm
kd <- with(MASS::geyser, MASS::kde2d(sold_price, infections_3mma, n = 50))
fig <- plot_ly(x = kd$x, y = kd$y, z = kd$z) %>% add_surface()
fig
# Correlation Matrix heatmap
# Get numeric variable
data_factor$bath_full < as.numeric(data_factor$bath_full)
num_vars <- data_factor %>% dplyr::select(where(is.numeric))
num_vars <- subset(num_vars, select = -c(top50_sold_price))
# Corr matrix
cormat <- round(cor(num_vars),2)
head(cormat)
melted_cormat <- melt(cormat)
head(melted_cormat)
ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill = value)) +
geom_tile() +
scale_fill_gradient2(low = very_low,
high = high,
mid = med,
midpoint = 0,
limit = c(-1,1),
space = "Lab",
name="Correlation") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 10, hjust = 1, color = "#2E2E2E"),
axis.text.y = element_text(angle = 0, vjust = 1, size = 10, hjust = 1, color = "#2E2E2E")) +
coord_fixed() +
labs(title = "Correlation Matrix",
x = "",
y = "")
# Distribution: Total
a <- ggplot(data_factor, aes(x = sold_price/1000)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Sold Price") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
b <- ggplot(data_factor, aes(x = list_price/1000)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("List Price") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
c <- ggplot(data_factor, aes(x = area_living)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Living Area") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
d <- ggplot(data_factor, aes(x = land_acres)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Land in Acres") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
e <- ggplot(data_factor, aes(x = area_total)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Total Area") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
f <- ggplot(data_factor, aes(x = age)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Age") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
g <- ggplot(data_factor, aes(x = dom)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("DOM") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
data_factor$sold_date <- as.Date(data_factor$sold_date)
str(data_factor)
h <- ggplot(data_factor, aes(x = sold_date)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Sold Date") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10)) +
scale_x_date(date_labels = "%Y")
i <- ggplot(data = subset(data_factor, data_factor$infections_daily > 1), aes(x = infections_daily)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
xlab("Infections Daily") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
data_factor$beds_total <- as.numeric(data_factor$beds_total)
j <- ggplot(data_factor, aes(x=beds_total)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
scale_fill_manual(values=c(very_low)) +
xlab("Number of Bedrooms") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
data_factor$bath_full <- as.numeric(data_factor$bath_full)
k <- ggplot(data_factor, aes(x=bath_full)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
scale_fill_manual(values=c(very_low)) +
xlab("Number of Full Bathrooms") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
data_factor$bath_half <- as.numeric(data_factor$bath_half)
l <- ggplot(data_factor, aes(x=bath_half)) +
geom_density(alpha = 0.5, position = "identity", fill = very_low) +
scale_fill_manual(values=c(very_low)) +
xlab("Number of Half Bathrooms") +
ylab("") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
text = element_text(size=10))
gridExtra::grid.arrange(a,b,c,d,e,f,g,h,i,j,k,l, nrow =4, ncol = 3)
lm_ucla <- lm(sold_price ~ pool + infections_period + pool*infections_period, data = data_factor)
summ(lm_ucla)
# load package
library(sjPlot)
library(sjmisc)
library(sjlabelled)
tab_model(lm_ucla)
end of document